## Code for Captured Investment

## for reproduction jump to line 628 to read-in provided datasets

## loading required packages

library(plm)
library(ggplot2)
library(dplyr)
library(arsenal)
library(tidyr)
library(plyr)
library(stringr)
library(car)
library(MASS)
library(zoo)
library(mapdata)
library(viridis)
library(rnaturalearth)
library(rnaturalearthdata)
library(stargazer)
library(lmtest)
library(ggpubr)
library(gtsummary)

## read in V-Dem Vers 12 dataset

V_Dem_data <- subset(read.csv(file.choose(), header = TRUE, 
                              stringsAsFactors = FALSE, na.strings = "NA",
                              skipNul = TRUE), year > 1989)


colnames(V_Dem_data)[1] <- "CountryName"

V_Dem_data <- subset(V_Dem_data, , c(CountryName, year, v2x_neopat, e_regionpol, e_pop,
                                     e_gdp, e_gdppc, e_peaveduc, v2x_regime, e_wbgi_gee))

colnames(V_Dem_data)[3] <- "NEOPAT"

colnames(V_Dem_data)[7] <- "GDPpc"

V_Dem_data <- V_Dem_data %>%
  group_by(CountryName) %>%
  mutate(e_wbgi_gee = na.approx(e_wbgi_gee, na.rm=FALSE))  

V_Dem_data <- V_Dem_data %>%
  group_by(CountryName) %>%
  fill(e_wbgi_gee, .direction = "downup") %>%
  fill(e_wbgi_gee, .direction = "updown")


## read in SWIID Gini

SWIID_data <- load("~/LSE/Dissertation/Hand in/downloaded datasets/swiid9_3.rda")

colnames(swiid_summary)[1] <- "CountryName"

colnames(swiid_summary)[5] <- "SWIID_Gini"


## read in WID income shares

TOP10_share <- read.csv(file.choose(), header = TRUE, 
                        stringsAsFactors = FALSE, na.strings = "NA",
                        skipNul = TRUE)

colnames(TOP10_share)[1] <- "CountryName"
colnames(TOP10_share)[4] <- "year"
colnames(TOP10_share)[5] <- "TOP10_SHARE"

TOP10_share$year <- as.numeric(TOP10_share$year)
TOP10_share$TOP10_SHARE <- as.numeric(TOP10_share$TOP10_SHARE)

summary(comparedf(subset(TOP10_share, year == 2019), 
                  subset(V_Dem_data, year == 2019, CountryName), 
                  by = "CountryName"))

TOP10_share$CountryName[TOP10_share$CountryName == "Cabo Verde"] <- "Cape Verde"
TOP10_share$CountryName[TOP10_share$CountryName == "Congo"] <- "Republic of the Congo"
TOP10_share$CountryName[TOP10_share$CountryName == "Cote d’Ivoire"] <- "Ivory Coast"
TOP10_share$CountryName[TOP10_share$CountryName == "DR Congo"] <- "Democratic Republic of the Congo"
TOP10_share$CountryName[TOP10_share$CountryName == "Gambia"] <- "The Gambia"
TOP10_share$CountryName[TOP10_share$CountryName == "Lao PDR"] <- "Laos"
TOP10_share$CountryName[TOP10_share$CountryName == "Myanmar"] <- "Burma/Myanmar"
TOP10_share$CountryName[TOP10_share$CountryName == "Swaziland"] <- "Eswatini"
TOP10_share$CountryName[TOP10_share$CountryName == "Viet Nam"] <- "Vietnam"


## read in FDI stock data global (from UNCTAD)

FDI_stock_global <- read.csv(file.choose(), skip = 4, header = TRUE, 
                             stringsAsFactors = FALSE, na.strings = "NA",
                             skipNul = TRUE)

colnames(FDI_stock_global)[1] <- "CountryName"

FDI_stock_global_long <- FDI_stock_global %>% gather(year, FDI_stock, -CountryName)

FDI_stock_global_long$year <- gsub("X", "", as.factor(FDI_stock_global_long$year))

FDI_stock_global_long <- FDI_stock_global_long %>% 
  mutate_if(is.character, str_trim)

colnames(FDI_stock_global_long)[3] <- "FDI"

FDI_stock_global_long$FDI <- as.numeric(FDI_stock_global_long$FDI)

FDI_stock_global_long$year <- as.numeric(FDI_stock_global_long$year)

## read in bilateral FDI data from UNCTAD (to compare effects of US and Chinese investment)

China_FDI_outstock_global <- subset(read.csv(file.choose(), skip = 4, header = TRUE, 
                                             stringsAsFactors = FALSE, na.strings = "NA",
                                             skipNul = TRUE), 
                                    Reporting.Country == "China")

colnames(China_FDI_outstock_global)[2] <- "CountryName"

China_FDI_outstock_global_long <- China_FDI_outstock_global %>% 
                                  gather(year, Chinese_FDI_stock, -c(Reporting.Country, 
                                  CountryName, FDI, Direction))

China_FDI_outstock_global_long$year <- gsub("X", "", as.factor(China_FDI_outstock_global_long$year))

China_FDI_outstock_global_long <- China_FDI_outstock_global_long %>% 
                                  mutate_if(is.character, str_trim)


US_FDI_outstock_global <- subset(read.csv(file.choose(), skip = 4, header = TRUE, 
                                          stringsAsFactors = FALSE, na.strings = "NA",
                                          skipNul = TRUE), 
                                 Reporting.Country == "United States")

colnames(US_FDI_outstock_global)[2] <- "CountryName"

US_FDI_outstock_global_long <- US_FDI_outstock_global %>% 
                               gather(year, US_FDI_stock, -c(Reporting.Country, 
                               CountryName, FDI, Direction))

US_FDI_outstock_global_long$year <- gsub("X", "", as.factor(US_FDI_outstock_global_long$year))

US_FDI_outstock_global_long <- US_FDI_outstock_global_long %>% 
                               mutate_if(is.character, str_trim)

UNCTAD_FDI_data_US_CH <- join(China_FDI_outstock_global_long, US_FDI_outstock_global_long,
                              by = c("CountryName", "year"), match = "first")

colnames(UNCTAD_FDI_data_US_CH)[6] <- "PRC_FDI"
colnames(UNCTAD_FDI_data_US_CH)[10] <- "US_FDI"


## read in control variables


trade_GDP <- read.csv(file.choose(), skip = 4, header = TRUE, 
                      stringsAsFactors = FALSE, na.strings = "NA",
                      skipNul = TRUE)

colnames(trade_GDP)[1] <- "CountryName"

trade_GDP_long <- trade_GDP %>% gather(year, trade_per_GDP, -c(CountryName, Country.Code, 
                                                               Indicator.Name, Indicator.Code))

trade_GDP_long$year <- gsub("X", "", as.factor(trade_GDP_long$year))

colnames(trade_GDP_long)[6] <- "TRADE"


tel_poles <- read.csv(file.choose(), skip = 4, header = TRUE, 
                      stringsAsFactors = FALSE, na.strings = "NA",
                      skipNul = TRUE)

colnames(tel_poles)[1] <- "CountryName"

tel_poles_long <- tel_poles %>% gather(year, infraphone, -c(CountryName, Country.Code, 
                                                            Indicator.Name, Indicator.Code))

tel_poles_long$year <- gsub("X", "", as.factor(tel_poles_long$year))

colnames(tel_poles_long)[6] <- "TELLINE"


Services <- read.csv(file.choose(), skip = 4, header = TRUE, 
                   stringsAsFactors = FALSE, na.strings = "NA",
                   skipNul = TRUE)

colnames(Services)[1] <- "CountryName"

Services_long <- Services %>% gather(year, "Services, value added (% of GDP)", -c(CountryName, Country.Code, 
                                                          Indicator.Name, Indicator.Code))

colnames(Services_long)[6] <- "SECTOR"

Services_long$year <- gsub("X", "", as.factor(Services_long$year))


finan_devel <- read.csv(file.choose(), header = TRUE, 
                        stringsAsFactors = FALSE, na.strings = "NA",
                        skipNul = TRUE)

finan_devel_long <- finan_devel %>% gather(year, "Financial Development Index", -c(CountryName, 
                                           Indicator.Name))

finan_devel_long$year <- gsub("X", "", as.factor(finan_devel_long$year))


colnames(finan_devel_long)[4] <- "FINDEV"


education <- subset(read.csv(file.choose(), header = TRUE, 
                      stringsAsFactors = FALSE, na.strings = "NA",
                      skipNul = TRUE), , c(eys_1990:eys_2019, country))

colnames(education)[31] <- "CountryName"

education_long <- education %>% gather(year, EDUC, -c(CountryName))

education_long$year <- gsub("eys_", "", as.factor(education_long$year))


GDP_WB <- read.csv(file.choose(), skip = 4, header = TRUE, 
                     stringsAsFactors = FALSE, na.strings = "NA",
                     skipNul = TRUE)

colnames(GDP_WB)[1] <- "CountryName"

GDP_WB_long <- GDP_WB %>% gather(year, "GDP_WB_cur", -c(CountryName, Country.Code, 
                                                        Indicator.Name, Indicator.Code))

GDP_WB_long$year <- gsub("X", "", as.factor(GDP_WB_long$year))

agriculture <- read.csv(file.choose(), skip = 4, header = TRUE, 
                   stringsAsFactors = FALSE, na.strings = "NA",
                   skipNul = TRUE)

colnames(agriculture)[1] <- "CountryName"

agriculture_long <- agriculture %>% gather(year, "AGRO_SECTOR", -c(CountryName, Country.Code, 
                                                        Indicator.Name, Indicator.Code))

agriculture_long$year <- gsub("X", "", as.factor(agriculture_long$year))

FDI_stock_current <- read.csv(file.choose(), skip = 4, header = TRUE, 
                             stringsAsFactors = FALSE, na.strings = "NA",
                             skipNul = TRUE)

colnames(FDI_stock_current)[1] <- "CountryName"

FDI_stock_current_long <- FDI_stock_current %>% gather(year, FDI_cur, -CountryName)

FDI_stock_current_long$year <- gsub("X", "", as.factor(FDI_stock_current_long$year))

FDI_stock_current_long$FDI_cur <- ifelse(FDI_stock_current_long$FDI_cur == "..",
                                         NA, ifelse(FDI_stock_current_long$FDI_cur == "_",
                                                    NA, FDI_stock_current_long$FDI_cur))

FDI_stock_current_long <- FDI_stock_current_long %>% 
  mutate_if(is.character, str_trim)

FDI_stock_current_long$FDI_cur <- as.numeric(FDI_stock_current_long$FDI_cur)

FDI_stock_current_long$year <- as.numeric(FDI_stock_current_long$year)


## adjust country names to mirror V-Dem

summary(comparedf(subset(UNCTAD_FDI_data_US_CH, year == 2019), 
                  subset(V_Dem_data, year == 2019, CountryName), 
                  by = "CountryName"))

UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Belgium / Luxembourg"] <- "Luxembourg"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Cabo Verde"] <- "Cape Verde"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Congo"] <- "Republic of the Congo"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Congo, Democratic Republic of"] <- "Democratic Republic of the Congo"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Côte d'Ivoire"] <- "Ivory Coast"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Czechia"] <- "Czech Republic"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Gambia"] <- "The Gambia"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Myanmar"] <- "Burma/Myanmar"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "São Tomé and Principe"] <- "Sao Tome and Principe"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Swaziland"] <- "Eswatini"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "United States"] <- "United States of America"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Viet Nam"] <- "Vietnam"
UNCTAD_FDI_data_US_CH$CountryName[UNCTAD_FDI_data_US_CH$CountryName == "Zaire"] <- "Democratic Republic of the Congo"




trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Cabo Verde"] <- "Cape Verde"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Congo, Dem. Rep."] <- "Democratic Republic of the Congo"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Congo, Rep."] <- "Republic of the Congo"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Cote d'Ivoire"] <- "Ivory Coast"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Egypt, Arab Rep."] <- "Egypt"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Gambia, The"] <- "The Gambia"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Iran, Islamic Rep."] <- "Iran"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Korea, Dem. People's Rep."] <- "North Korea"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Korea, Rep."] <- "South Korea"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Kyrgyz Republic"] <- "Kyrgyzstan"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Lao PDR"] <- "Laos"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Russian Federation"] <- "Russia"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Slovak Republic"] <- "Slovakia"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Syrian Arab Republic"] <- "Syria"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "United States"] <- "United States of America"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Venezuela, RB"] <- "Venezuela"
trade_GDP_long$CountryName[trade_GDP_long$CountryName == "Yemen, Rep."] <- "Yemen"


tel_poles_long$CountryName[tel_poles_long$CountryName == "Cabo Verde"] <- "Cape Verde"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Congo, Dem. Rep."] <- "Democratic Republic of the Congo"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Congo, Rep."] <- "Republic of the Congo"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Cote d'Ivoire"] <- "Ivory Coast"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Egypt, Arab Rep."] <- "Egypt"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Gambia, The"] <- "The Gambia"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Iran, Islamic Rep."] <- "Iran"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Korea, Dem. People's Rep."] <- "North Korea"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Korea, Rep."] <- "South Korea"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Kyrgyz Republic"] <- "Kyrgyzstan"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Lao PDR"] <- "Laos"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Russian Federation"] <- "Russia"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Slovak Republic"] <- "Slovakia"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Syrian Arab Republic"] <- "Syria"
tel_poles_long$CountryName[tel_poles_long$CountryName == "United States"] <- "United States of America"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Venezuela, RB"] <- "Venezuela"
tel_poles_long$CountryName[tel_poles_long$CountryName == "Yemen, Rep."] <- "Yemen"


Services_long$CountryName[Services_long$CountryName == "Cabo Verde"] <- "Cape Verde"
Services_long$CountryName[Services_long$CountryName == "Congo, Dem. Rep."] <- "Democratic Republic of the Congo"
Services_long$CountryName[Services_long$CountryName == "Congo, Rep."] <- "Republic of the Congo"
Services_long$CountryName[Services_long$CountryName == "Cote d'Ivoire"] <- "Ivory Coast"
Services_long$CountryName[Services_long$CountryName == "Egypt, Arab Rep."] <- "Egypt"
Services_long$CountryName[Services_long$CountryName == "Gambia, The"] <- "The Gambia"
Services_long$CountryName[Services_long$CountryName == "Iran, Islamic Rep."] <- "Iran"
Services_long$CountryName[Services_long$CountryName == "Korea, Dem. People's Rep."] <- "North Korea"
Services_long$CountryName[Services_long$CountryName == "Korea, Rep."] <- "South Korea"
Services_long$CountryName[Services_long$CountryName == "Kyrgyz Republic"] <- "Kyrgyzstan"
Services_long$CountryName[Services_long$CountryName == "Lao PDR"] <- "Laos"
Services_long$CountryName[Services_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
Services_long$CountryName[Services_long$CountryName == "Russian Federation"] <- "Russia"
Services_long$CountryName[Services_long$CountryName == "Slovak Republic"] <- "Slovakia"
Services_long$CountryName[Services_long$CountryName == "Syrian Arab Republic"] <- "Syria"
Services_long$CountryName[Services_long$CountryName == "United States"] <- "United States of America"
Services_long$CountryName[Services_long$CountryName == "Venezuela, RB"] <- "Venezuela"
Services_long$CountryName[Services_long$CountryName == "Yemen, Rep."] <- "Yemen"

GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Cabo Verde"] <- "Cape Verde"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Congo, Dem. Rep."] <- "Democratic Republic of the Congo"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Congo, Rep."] <- "Republic of the Congo"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Cote d'Ivoire"] <- "Ivory Coast"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Egypt, Arab Rep."] <- "Egypt"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Gambia, The"] <- "The Gambia"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Iran, Islamic Rep."] <- "Iran"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Korea, Dem. People's Rep."] <- "North Korea"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Korea, Rep."] <- "South Korea"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Kyrgyz Republic"] <- "Kyrgyzstan"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Lao PDR"] <- "Laos"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Russian Federation"] <- "Russia"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Slovak Republic"] <- "Slovakia"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Syrian Arab Republic"] <- "Syria"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "United States"] <- "United States of America"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Venezuela, RB"] <- "Venezuela"
GDP_WB_long$CountryName[GDP_WB_long$CountryName == "Yemen, Rep."] <- "Yemen"



summary(comparedf(subset(FDI_stock_global_long, year == 2019), 
                  subset(V_Dem_data, year == 2019, CountryName), 
                  by = "CountryName"))


FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Bolivia (Plurinational State of)"] <- "Bolivia"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Cabo Verde"] <- "Cape Verde"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Congo"] <- "Republic of the Congo"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Congo, Dem. Rep. of the"] <- "Democratic Republic of the Congo"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Czechia"] <- "Czech Republic"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Ethiopia (...1991)"] <- "Ethiopia"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Gambia"] <- "The Gambia"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Germany, Federal Republic of"] <- "Germany"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Iran (Islamic Republic of)"] <- "Iran"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Korea, Dem. People's Rep. of"] <- "North Korea"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Korea, Republic of"] <- "South Korea"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Lao People's Dem. Rep."] <- "Laos"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Moldova, Republic of"] <- "Moldova"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Russian Federation"] <- "Russia"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Sudan (...2011)"] <- "Sudan"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Switzerland, Liechtenstein"] <- "Switzerland"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Syrian Arab Republic"] <- "Syria"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Tanzania, United Republic of"] <- "Tanzania"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Venezuela (Bolivarian Rep. of)"] <- "Venezuela"
FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Viet Nam"] <- "Vietnam"
Encoding(FDI_stock_global_long$CountryName) <- "latin1"

FDI_stock_global_long$CountryName[FDI_stock_global_long$CountryName == "Côte d'Ivoire"] <- "Ivory Coast"


FDI_stock_global_long <- subset(FDI_stock_global_long, subset = !is.na(FDI))

summary(comparedf(subset(swiid_summary, year == 2009), 
                  subset(V_Dem_data, year == 2019, CountryName), 
                  by = "CountryName"))

swiid_summary$CountryName[swiid_summary$CountryName == "Congo-Kinshasa"] <- "Democratic Republic of the Congo"
swiid_summary$CountryName[swiid_summary$CountryName == "Congo-Brazzaville"] <- "Republic of the Congo"
swiid_summary$CountryName[swiid_summary$CountryName == "Gambia"] <- "The Gambia"
swiid_summary$CountryName[swiid_summary$CountryName == "Côte d'Ivoire"] <- "Ivory Coast"
swiid_summary$CountryName[swiid_summary$CountryName == "São Tomé and Príncipe"] <- "Sao Tome and Principe"
swiid_summary$CountryName[swiid_summary$CountryName == "Korea"] <- "South Korea"
swiid_summary$CountryName[swiid_summary$CountryName == "United States"] <- "United States of America"
swiid_summary$CountryName[swiid_summary$CountryName == "Myanmar"] <- "Burma/Myanmar"
swiid_summary$CountryName[swiid_summary$CountryName == "Equatorial Guinea"] <- "Equatorial-Guinea"


summary(comparedf(subset(education_long, year == 2019), 
                  subset(V_Dem_data, year == 2019, CountryName), 
                  by = "CountryName"))


education_long$CountryName[education_long$CountryName == "Cabo Verde"] <- "Cape Verde"
education_long$CountryName[education_long$CountryName == "Congo"] <- "Republic of the Congo"
education_long$CountryName[education_long$CountryName == "Congo (Democratic Republic of the)"] <- "Democratic Republic of the Congo"
education_long$CountryName[education_long$CountryName == "Lao People's Democratic Republic"] <- "Laos"
education_long$CountryName[education_long$CountryName == "Côte d'Ivoire"] <- "Ivory Coast"
education_long$CountryName[education_long$CountryName == "Eswatini (Kingdom of)"] <- "Eswatini"
education_long$CountryName[education_long$CountryName == "Gambia"] <- "The Gambia"
education_long$CountryName[education_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
education_long$CountryName[education_long$CountryName == "Tanzania (United Republic of)"] <- "Tanzania"
education_long$CountryName[education_long$CountryName == "Viet Nam"] <- "Vietnam"


summary(comparedf(subset(finan_devel_long, year == 2019), 
                  subset(V_Dem_data, year == 2019, CountryName), 
                  by = "CountryName"))


finan_devel_long$CountryName[finan_devel_long$CountryName == "Cabo Verde"] <- "Cape Verde"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Central African Rep."] <- "Central African Republic"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Comoros, Union of the"] <- "Comoros"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Congo, Dem. Rep. of the"] <- "Democratic Republic of the Congo"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Congo, Rep. of"] <- "Republic of the Congo"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Côte d'Ivoire"] <- "Ivory Coast"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Equatorial Guinea, Rep. of"] <- "Equatorial Guinea"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Eritrea, The State of"] <- "Eritrea"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Eswatini, Kingdom of"] <- "Eswatini"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Ethiopia, The Federal Dem. Rep. of"] <- "Ethiopia"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Gambia, The"] <- "The Gambia"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Lao People's Dem. Rep."] <- "Laos"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Lesotho, Kingdom of"] <- "Lesotho"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Madagascar, Rep. of"] <- "Madagascar"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Mauritania, Islamic Rep. of"] <- "Mauritania"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Mozambique, Rep. of"] <- "Mozambique"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
finan_devel_long$CountryName[finan_devel_long$CountryName == "South Sudan, Rep. of"] <- "South Sudan"
finan_devel_long$CountryName[finan_devel_long$CountryName == "São Tomé and Príncipe, Dem. Rep. of"] <- "Sao Tome and Principe"
finan_devel_long$CountryName[finan_devel_long$CountryName == "Tanzania, United Rep. of"] <- "Tanzania"


FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Bolivia (Plurinational State of)"] <- "Bolivia"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Cabo Verde"] <- "Cape Verde"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Congo"] <- "Republic of the Congo"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Congo, Dem. Rep. of the"] <- "Democratic Republic of the Congo"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Czechia"] <- "Czech Republic"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Ethiopia (...1991)"] <- "Ethiopia"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Gambia"] <- "The Gambia"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Germany, Federal Republic of"] <- "Germany"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Iran (Islamic Republic of)"] <- "Iran"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Korea, Dem. People's Rep. of"] <- "North Korea"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Korea, Republic of"] <- "South Korea"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Lao People's Dem. Rep."] <- "Laos"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Moldova, Republic of"] <- "Moldova"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Myanmar"] <- "Burma/Myanmar"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Russian Federation"] <- "Russia"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Sudan (...2011)"] <- "Sudan"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Switzerland, Liechtenstein"] <- "Switzerland"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Syrian Arab Republic"] <- "Syria"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Tanzania, United Republic of"] <- "Tanzania"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Venezuela (Bolivarian Rep. of)"] <- "Venezuela"
FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Viet Nam"] <- "Vietnam"
Encoding(FDI_stock_current_long$CountryName) <- "latin1"

FDI_stock_current_long$CountryName[FDI_stock_current_long$CountryName == "Côte d'Ivoire"] <- "Ivory Coast"

FDI_stock_current_long <- subset(FDI_stock_current_long, subset = !is.na(FDI_cur))

## building a joint dataset

database_full <- join(join(join(join(join(join(join(join(join(join(join(join(
                     subset(V_Dem_data, subset = e_regionpol == 7 | e_regionpol == 4), 
                     FDI_stock_global_long,
                     by = c("CountryName", "year"), match = "first"),
                     subset(swiid_summary, , c(CountryName, year, SWIID_Gini, gini_disp)),
                     by = c("CountryName", "year"), match = "first"),
                     subset(TOP10_share, , c(CountryName, year, TOP10_SHARE)),
                     by = c("CountryName", "year"), match = "first"),
                     subset(trade_GDP_long, , c(CountryName, year, TRADE)),
                     by = c("CountryName", "year"), match = "first"),
                     education_long,
                     by = c("CountryName", "year"), match = "first"),
                     subset(Services_long, , c(CountryName, year, SECTOR)),
                     by = c("CountryName", "year"), match = "first"),
                     subset(finan_devel_long, , c(CountryName, year, FINDEV)),
                     by = c("CountryName", "year"), match = "first"),
                     subset(tel_poles_long, , c(CountryName, year, TELLINE)),
                     by = c("CountryName", "year"), match = "first"),
                    subset(UNCTAD_FDI_data_US_CH, , c(CountryName, year, US_FDI, PRC_FDI)),
                    by = c("CountryName", "year"), match = "first"),
                    subset(GDP_WB_long, , c(CountryName, year, GDP_WB_cur)),
                    by = c("CountryName", "year"), match = "first"),
                    subset(agriculture_long, , c(CountryName, year, AGRO_SECTOR)),
                    by = c("CountryName", "year"), match = "first"),
                    subset(FDI_stock_current_long, ,c(CountryName, year, FDI_cur)), 
                    by = c("CountryName", "year"), match = "first")

database_full$FINDEV <- as.numeric(database_full$FINDEV)

database_full$TOP10_SHARE <- database_full$TOP10_SHARE*100


## creating other variables

database_full$DEMOCRACY <- ifelse(database_full$v2x_regime == 2,
                                  1, ifelse(database_full$v2x_regime == 3,
                                            1, 0))

## calculating Chinese and US FDI as % of GDP in current USD

database_full$PRC_SHARE <- (database_full$PRC_FDI/database_full$FDI_cur)*100

database_full$PRC_FDI <- ((database_full$PRC_FDI*1000000)/database_full$GDP_WB_cur)*100

database_full$R.FDI <- (database_full$FDI - database_full$PRC_FDI)


## creating squared variables

database_full$FDI <- as.numeric(database_full$FDI)
database_full$FDI2 <- database_full$FDI^2

database_full$PRC_FDI2 <- database_full$PRC_FDI^2

database_full$R.FDI2 <- database_full$R.FDI^2


database_full$GDPpc2 <- database_full$GDPpc^2

database_full$TRADE2 <- database_full$TRADE^2

database_full$EDUC2 <- database_full$EDUC^2

database_full$FDIxNEOPAT <- lag(database_full$FDI,1)*database_full$NEOPAT
database_full$FDI2xNEOPAT <- lag(database_full$FDI2,1)*database_full$NEOPAT


## filtering SEA and SSA dataset

database_SEA <- subset(database_full, e_regionpol == 7)

database_Africa <- subset(database_full, e_regionpol == 4)


## creating maps and graphs for descriptive statistics

world_map <- subset(ne_countries(scale = "medium", returnclass = "sf"), sovereignt != "Antarctica")

colnames(world_map)[4] <- "CountryName"

summary(comparedf(database_full, world_map, by = "CountryName"))

world_map$CountryName[world_map$CountryName == "Myanmar"]                     <- "Burma/Myanmar"
world_map$CountryName[world_map$CountryName == "Swaziland"]                   <- "Eswatini"
world_map$CountryName[world_map$CountryName == "Republic of Congo"]           <- "Republic of the Congo"
world_map$CountryName[world_map$CountryName == "Republic of Serbia"]          <- "Serbia"
world_map$CountryName[world_map$CountryName == "United Republic of Tanzania"] <- "Tanzania"
world_map$CountryName[world_map$CountryName == "Guinea Bissau"] <- "Guinea-Bissau"
world_map$CountryName[world_map$CountryName == "Gambia"] <- "The Gambia"


mapping_data <- merge(world_map, database_full, by = "CountryName",
                      all.x = TRUE)


## coverage map

database_SEA_Africa <- rbind(subset(database_SEA, , c(CountryName, year, SWIID_Gini, FDI, GDPpc, TRADE, EDUC, SECTOR, DEMOCRACY, TELLINE, NEOPAT)), 
                             subset(database_Africa, , c(CountryName, year, SWIID_Gini, FDI, GDPpc, TRADE, EDUC, SECTOR, DEMOCRACY, TELLINE, NEOPAT)))

database_SEA_Africa$notNA <- ifelse(!is.na(database_SEA_Africa$SWIID_Gini) &
                                      !is.na(database_SEA_Africa$FDI) &
                                      !is.na(database_SEA_Africa$GDPpc) &
                                      !is.na(database_SEA_Africa$TRADE) &
                                      !is.na(database_SEA_Africa$TELLINE) &
                                      !is.na(database_SEA_Africa$SECTOR) &
                                      !is.na(database_SEA_Africa$EDUC) &
                                      !is.na(database_SEA_Africa$NEOPAT),
                                    1, 0)

database_SEA_Africa_fmap <- aggregate(database_SEA_Africa, notNA ~ CountryName,
                                      sum)


database_SEA_Africa_fmap <- merge(mapping_data, database_SEA_Africa_fmap,
                                 by = "CountryName")


coverage_map <- ggplot(subset(database_SEA_Africa_fmap, subset = year == 2020 &
                                e_regionpol == 4 |
                                year == 2020 &
                                e_regionpol == 7), aes()) +
                geom_sf(aes(fill = notNA)) +
                scale_fill_gradient(low = "lightblue", high = "darkblue") +
                ggtitle("Data availability by country") + 
                guides(fill=guide_legend(title="Number of country-years available")) +
                theme(legend.position = "bottom")

## NEOPAT map

neopat_map <- ggplot(subset(mapping_data, subset = year == 2020 &
                              e_regionpol == 4 |
                              year == 2020 &
                              e_regionpol == 7), aes()) +
              geom_sf(aes(fill = NEOPAT)) +
              scale_fill_gradient(low = "lightblue", high = "darkblue") +
              ggtitle("Neopatrimonial Rule Index by Country") + 
              guides(fill=guide_legend(title="Neopatrimonial Rule Index")) +
              theme(legend.position = "bottom")


## regression analysis with two-way fixed effects

## for reproduction load following datasets here
##do not run

##database_full <- read.csv(file.choose(), header = TRUE, 
##                                stringsAsFactors = FALSE, na.strings = "NA",
##                                skipNul = TRUE)

##database_SEA <- read.csv(file.choose(), header = TRUE, 
##                          stringsAsFactors = FALSE, na.strings = "NA",
##                          skipNul = TRUE)

##database_Africa <- read.csv(file.choose(), header = TRUE, 
##                          stringsAsFactors = FALSE, na.strings = "NA",
##                          skipNul = TRUE)

## regression analysis for SEA sample

fixed_reg_SEA_global_SWIID <- plm(formula = SWIID_Gini ~ NEOPAT + GDPpc + GDPpc2 + 
                                  TRADE + TRADE2 + EDUC + 
                                  TELLINE + DEMOCRACY + 
                                  SECTOR + FINDEV +
                                  FDI + FDI*NEOPAT + 
                                  FDI2 + FDI2*NEOPAT, 
                                data = database_SEA, na.action = na.exclude, 
                                index = c("CountryName", "year"), model = "within",
                                effect = "twoway")

summary(fixed_reg_SEA_global_SWIID)

## SEA model diagnostics

plmtest(fixed_reg_SEA_global_SWIID, effect = "twoways")

pcdtest(fixed_reg_SEA_global_SWIID, test = "cd")

pwartest(fixed_reg_SEA_global_SWIID)

pbnftest(fixed_reg_SEA_global_SWIID)

qqnorm(residuals(fixed_reg_SEA_global_SWIID), ylab = 'Residuals')
qqline(residuals(fixed_reg_SEA_global_SWIID))

## address serially correlated errors and heteroskedasticity via clustered SE

coeftest(fixed_reg_SEA_global_SWIID, 
         vcov = vcovHC(fixed_reg_SEA_global_SWIID, 
                       method = "arellano", 
                       type = "HC1"))

## repeating the model using WID

fixed_reg_SEA_global_WID <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                      TRADE + TRADE2 + EDUC + 
                                      TELLINE + DEMOCRACY + 
                                      SECTOR + FINDEV + 
                                      FDI + FDI*NEOPAT +
                                      FDI2 + FDI2*NEOPAT, 
                                    data = database_SEA, na.action = na.exclude, 
                                    index = c("CountryName", "year"), model = "within",
                                    effect = "twoway")

summary(fixed_reg_SEA_global_WID)


fixed_reg_SEA_global_WID_adj <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                  TRADE + TRADE2 + EDUC + EDUC2 +
                                  TELLINE + DEMOCRACY + 
                                  SECTOR + FINDEV +
                                  lag(FDI,1) + lag(FDI,1)*NEOPAT +
                                  lag(FDI2,1) + lag(FDI2,1)*NEOPAT, 
                                data = subset(database_SEA, CountryName != "Singapore"), na.action = na.exclude, 
                                index = c("CountryName", "year"), model = "within",
                                effect = "twoway")

summary(fixed_reg_SEA_global_WID_adj)


## SEA model diagnostics

pbnftest(fixed_reg_SEA_global_WID_adj)

pwartest(fixed_reg_SEA_global_WID_adj)

pcdtest(fixed_reg_SEA_global_WID_adj, test = "cd")

qqnorm(residuals(fixed_reg_SEA_global_WID_adj), ylab = 'Residuals')
qqline(residuals(fixed_reg_SEA_global_WID_adj))

## address serially correlated errors and heteroskedasticity via clustered SE

coeftest(fixed_reg_SEA_global_WID_adj, 
         vcov = vcovHC(fixed_reg_SEA_global_WID_adj, 
                       method = "arellano", 
                       type = "HC1"))

## two-way fixed effects for the African sample


fixed_reg_Africa_global_SWIID <- plm(formula = SWIID_Gini ~ NEOPAT + GDPpc + GDPpc2 + 
                                       TRADE + TRADE2 + EDUC + 
                                       TELLINE + DEMOCRACY + 
                                       SECTOR + FINDEV +
                                       FDI + FDI*NEOPAT +
                                       FDI2 + FDI2*NEOPAT, 
                                   data = database_Africa, na.action = na.exclude, 
                                   index = c("CountryName", "year"), model = "within",
                                   effect = "twoway")

summary(fixed_reg_Africa_global_SWIID)

pbnftest(fixed_reg_Africa_global_SWIID)

pwartest(fixed_reg_Africa_global_SWIID)

pcdtest(fixed_reg_Africa_global_SWIID, test = "cd")

qqnorm(residuals(fixed_reg_Africa_global_SWIID), ylab = 'Residuals')
qqline(residuals(fixed_reg_Africa_global_SWIID))

## address serially correlated errors and heteroskedasticity via clustered SE

coeftest(fixed_reg_Africa_global_SWIID, 
         vcov = vcovHC(fixed_reg_Africa_global_SWIID, 
                       method = "arellano", 
                       type = "HC1"))

## repeating estimation with WID

fixed_reg_Africa_global_WID <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                     TRADE + TRADE2 + EDUC +
                                     TELLINE + DEMOCRACY + 
                                     SECTOR + FINDEV +
                                     FDI + FDI*NEOPAT +
                                     FDI2 + FDI2*NEOPAT, 
                                   data = database_Africa, na.action = na.exclude, 
                                   index = c("CountryName", "year"), model = "within",
                                   effect = "twoway")

summary(fixed_reg_Africa_global_WID)

## addressing outliers

ggplot(data = database_Africa, aes(x = year, y = FDI, label = CountryName)) +
  geom_point(aes()) + geom_text()


tapply(database_Africa$FDI, database_Africa$year, summary)


Africa_ex_out <- subset(database_Africa, subset = year == 1990 & FDI < quantile(subset(database_Africa,year==1990)$FDI, 0.8, na.rm = TRUE) |
                          year == 1991 & FDI < quantile(subset(database_Africa,year==1991)$FDI, 0.9, na.rm = TRUE) |
                          year == 1992 & FDI < quantile(subset(database_Africa,year==1992)$FDI, 0.9, na.rm = TRUE) |
                          year == 1993 & FDI < quantile(subset(database_Africa,year==1993)$FDI, 0.9, na.rm = TRUE) |
                          year == 1994 & FDI < quantile(subset(database_Africa,year==1994)$FDI, 0.9, na.rm = TRUE) |
                          year == 1995 & FDI < quantile(subset(database_Africa,year==1995)$FDI, 0.9, na.rm = TRUE) |
                          year == 1996 & FDI < quantile(subset(database_Africa,year==1996)$FDI, 0.9, na.rm = TRUE) |
                          year == 1997 & FDI < quantile(subset(database_Africa,year==1997)$FDI, 0.9, na.rm = TRUE) |
                          year == 1998 & FDI < quantile(subset(database_Africa,year==1998)$FDI, 0.9, na.rm = TRUE) |
                          year == 1999 & FDI < quantile(subset(database_Africa,year==1999)$FDI, 0.9, na.rm = TRUE) |
                          year == 2000 & FDI < quantile(subset(database_Africa,year==2000)$FDI, 0.9, na.rm = TRUE) |
                          year == 2001 & FDI < quantile(subset(database_Africa,year==2001)$FDI, 0.9, na.rm = TRUE) |
                          year == 2002 & FDI < quantile(subset(database_Africa,year==2002)$FDI, 0.9, na.rm = TRUE) |
                          year == 2003 & FDI < quantile(subset(database_Africa,year==2003)$FDI, 0.9, na.rm = TRUE) |
                          year == 2004 & FDI < quantile(subset(database_Africa,year==2004)$FDI, 0.9, na.rm = TRUE) |
                          year == 2005 & FDI < quantile(subset(database_Africa,year==2005)$FDI, 0.9, na.rm = TRUE) |
                          year == 2006 & FDI < quantile(subset(database_Africa,year==2006)$FDI, 0.9, na.rm = TRUE) |
                          year == 2007 & FDI < quantile(subset(database_Africa,year==2007)$FDI, 0.9, na.rm = TRUE) |
                          year == 2008 & FDI < quantile(subset(database_Africa,year==2008)$FDI, 0.9, na.rm = TRUE) |
                          year == 2009 & FDI < quantile(subset(database_Africa,year==2009)$FDI, 0.9, na.rm = TRUE) |
                          year == 2010 & FDI < quantile(subset(database_Africa,year==2010)$FDI, 0.9, na.rm = TRUE) |
                          year == 2011 & FDI < quantile(subset(database_Africa,year==2011)$FDI, 0.9, na.rm = TRUE) |
                          year == 2012 & FDI < quantile(subset(database_Africa,year==2012)$FDI, 0.9, na.rm = TRUE) |
                          year == 2013 & FDI < quantile(subset(database_Africa,year==2013)$FDI, 0.9, na.rm = TRUE) |
                          year == 2014 & FDI < quantile(subset(database_Africa,year==2014)$FDI, 0.9, na.rm = TRUE) |
                          year == 2015 & FDI < quantile(subset(database_Africa,year==2015)$FDI, 0.9, na.rm = TRUE) |
                          year == 2016 & FDI < quantile(subset(database_Africa,year==2016)$FDI, 0.9, na.rm = TRUE) |
                          year == 2017 & FDI < quantile(subset(database_Africa,year==2017)$FDI, 0.9, na.rm = TRUE) |
                          year == 2018 & FDI < quantile(subset(database_Africa,year==2018)$FDI, 0.9, na.rm = TRUE) |
                          year == 2019 & FDI < quantile(subset(database_Africa,year==2019)$FDI, 0.9, na.rm = TRUE) |
                          year == 2020 & FDI < quantile(subset(database_Africa,year==2020)$FDI, 0.9, na.rm = TRUE))

ggplot(subset(database_Africa, CountryName == "Liberia"), aes(x=year, y=FDI)) +
  geom_line()


fixed_reg_Africa_global_WID_adj <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                     TRADE + TRADE2 + EDUC + EDUC2 +
                                     TELLINE + DEMOCRACY + 
                                     SECTOR + FINDEV +
                                     lag(FDI,1) + lag(FDI,1)*NEOPAT +
                                     lag(FDI2,1) + lag(FDI2,1)*NEOPAT, 
                                   data = Africa_ex_out, na.action = na.exclude, 
                                   index = c("CountryName", "year"), model = "within",
                                   effect = "twoway")

summary(fixed_reg_Africa_global_WID_adj)


pbnftest(fixed_reg_Africa_global_WID_adj)

pwartest(fixed_reg_Africa_global_WID_adj)

pcdtest(fixed_reg_Africa_global_WID_adj, test = "cd")

qqnorm(residuals(fixed_reg_Africa_global_WID_adj), ylab = 'Residuals')
qqline(residuals(fixed_reg_Africa_global_WID))


## address serially correlated errors and heteroskedasticity via clustered SE

coeftest(fixed_reg_Africa_global_WID_adj, 
         vcov = vcovHC(fixed_reg_Africa_global_WID_adj, 
                       method = "arellano", 
                       type = "HC1"))


## testing correlation between SWIID and WID

cor_SWIID_WID <- plm(TOP10_SHARE ~ SWIID_Gini,
                     data = database_full, na.action = na.exclude, 
                     index = c("CountryName", "year"), model = "within",
                     effect = "twoway")

summary(cor_SWIID_WID)


## additional interaction terms

colnames(Africa_ex_out)[10] <- "GOV_CAP"

cor(Africa_ex_out$NEOPAT, Africa_ex_out$GOV_CAP, use = "complete.obs")

var_tester <- plm(NEOPAT ~ GOV_CAP, 
                  data = Africa_ex_out, na.action = na.exclude, 
                  index = c("CountryName", "year"), model = "within",
                  effect = "twoway")

summary(var_tester)



fixed_reg_SEA_global_SWIID_add <- plm(formula = SWIID_Gini ~ NEOPAT + GDPpc + GDPpc2 + 
                                      TRADE + TRADE2 + EDUC + EDUC2 +
                                      TELLINE + DEMOCRACY + 
                                      SECTOR + FINDEV + GOV_CAP +
                                      lag(FDI,1) + lag(FDI,1)*NEOPAT +
                                      lag(FDI2,1) + lag(FDI2,1)*NEOPAT +
                                      lag(FDI2,1)*EDUC + lag(FDI2,1)*TELLINE +
                                      lag(FDI2,1)*FINDEV + lag(FDI2,1)*SECTOR +
                                      lag(FDI2,1)*GOV_CAP, 
                                    data = Africa_ex_out, na.action = na.exclude, 
                                    index = c("CountryName", "year"), model = "within",
                                    effect = "twoway")

summary(fixed_reg_SEA_global_SWIID_add)

coeftest(fixed_reg_SEA_global_SWIID_add, 
         vcov = vcovHC(fixed_reg_SEA_global_SWIID_add, 
                       method = "arellano", 
                       type = "HC1"))

fixed_reg_SEA_global_WID_add <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                         TRADE + TRADE2 + EDUC + EDUC2 +
                                         TELLINE + DEMOCRACY + 
                                         SECTOR + FINDEV + GOV_CAP +
                                         lag(FDI,1) + lag(FDI,1)*NEOPAT +
                                         lag(FDI2,1) + lag(FDI2,1)*NEOPAT +
                                         lag(FDI2,1)*EDUC + lag(FDI2,1)*TELLINE +
                                         lag(FDI2,1)*FINDEV + lag(FDI2,1)*SECTOR +
                                         lag(FDI2,1)*GOV_CAP, 
                                       data = Africa_ex_out, na.action = na.exclude, 
                                       index = c("CountryName", "year"), model = "within",
                                       effect = "twoway")

summary(fixed_reg_SEA_global_WID_add)

coeftest(fixed_reg_SEA_global_WID_add, 
         vcov = vcovHC(fixed_reg_SEA_global_WID_add, 
                       method = "arellano", 
                       type = "HC1"))

## robustness test high and low neopat

summary(database_full$NEOPAT)

high_neopat <- subset(Africa_ex_out, NEOPAT > quantile(NEOPAT, 0.75))
low_neopat <- subset(Africa_ex_out, NEOPAT < quantile(NEOPAT, 0.25))


WID_high_neopat_fixed <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                 TRADE + TRADE2 + EDUC + EDUC2 +
                                 TELLINE +  
                                 SECTOR + FINDEV +
                                 lag(FDI,1) + lag(FDI2,1), 
                               data = high_neopat, na.action = na.exclude, 
                               index = c("CountryName", "year"), model = "within",
                               effect = "twoway")

summary(WID_high_neopat_fixed)

coeftest(WID_high_neopat_fixed, 
         vcov = vcovHC(WID_high_neopat_fixed, 
                       method = "arellano", 
                       type = "HC1"))

WID_low_neopat_fixed <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                 TRADE + TRADE2 + EDUC + EDUC2 +
                                 TELLINE +  
                                 SECTOR + FINDEV +
                                 lag(FDI,1) + lag(FDI2,1), 
                               data = low_neopat, na.action = na.exclude, 
                               index = c("CountryName", "year"), model = "within",
                               effect = "twoway")

summary(WID_low_neopat_fixed)

coeftest(WID_low_neopat_fixed, 
         vcov = vcovHC(WID_low_neopat_fixed, 
                       method = "arellano", 
                       type = "HC1"))


## distinguishing between sources of FDI


fixed_reg_by_source_SWIID <- plm(formula = SWIID_Gini ~ NEOPAT + GDPpc + GDPpc2 + 
                                   TRADE + TRADE2 + EDUC + EDUC2 +
                                   TELLINE + DEMOCRACY + 
                                   SECTOR + FINDEV + 
                                   PRC_FDI + PRC_FDI*NEOPAT +
                                   PRC_FDI2 + PRC_FDI2*NEOPAT +
                                   R.FDI + R.FDI*NEOPAT +
                                   R.FDI2 + R.FDI2*NEOPAT, 
                               data = subset(database_full, subset = year > 2002 &
                                               CountryName != "Singapore"), 
                               na.action = na.exclude, 
                               index = c("CountryName", "year"), model = "within",
                               effect = "twoway")

summary(fixed_reg_by_source_SWIID)

pwartest(fixed_reg_by_source_SWIID)

pbnftest(fixed_reg_by_source_SWIID)

pcdtest(fixed_reg_by_source_SWIID, test = "cd")

qqnorm(residuals(fixed_reg_by_source_SWIID), ylab = 'Residuals')
qqline(residuals(fixed_reg_by_source_SWIID))

## address serially correlated errors and heteroskedasticity via clustered SE

coeftest(fixed_reg_by_source_SWIID, 
         vcov = vcovHC(fixed_reg_by_source_SWIID, 
                       method = "arellano", 
                       type = "HC1"))


## repeating estimation with WID 


fixed_reg_by_source_WID <- plm(formula = TOP10_SHARE ~ NEOPAT + GDPpc + GDPpc2 + 
                                     TRADE + TRADE2 + EDUC + EDUC2 +
                                     TELLINE + DEMOCRACY + 
                                     SECTOR + FINDEV + 
                                     PRC_FDI + PRC_FDI*NEOPAT +
                                     PRC_FDI2 + PRC_FDI2*NEOPAT +
                                     R.FDI + R.FDI*NEOPAT +
                                     R.FDI2 + R.FDI2*NEOPAT, 
                                   data = subset(database_full, subset = year > 2002 &
                                                   CountryName != "Singapore"), 
                                   na.action = na.exclude, 
                                   index = c("CountryName", "year"), model = "within",
                                   effect = "twoway")

summary(fixed_reg_by_source_WID)


pwartest(fixed_reg_by_source_WID)

pbnftest(fixed_reg_by_source_WID)

pcdtest(fixed_reg_by_source_WID, test = "cd")


qqnorm(residuals(fixed_reg_by_source_WID), ylab = 'Residuals')
qqline(residuals(fixed_reg_by_source_WID))

## address serially correlated errors and heteroskedasticity via clustered SE

coeftest(fixed_reg_by_source_WID, 
         vcov = vcovHC(fixed_reg_by_source_WID, 
                       method = "arellano", 
                       type = "HC1"))


## create regression tables for paper

robust_se_SEA_SWIID = vcovHC(fixed_reg_SEA_global_SWIID,
                             method = "arellano", type = "HC1") %>% diag() %>% sqrt()

robust_se_Africa_SWIID = vcovHC(fixed_reg_Africa_global_SWIID, 
                                method = "arellano", type = "HC1") %>% diag() %>% sqrt()


Mod1.1 <- fixed_reg_SEA_global_SWIID
Mod1.2 <- fixed_reg_Africa_global_SWIID

stargazer(list(Mod1.1, Mod1.1,
               Mod1.2, Mod1.2),
          type = "html",
          se = list(NULL, robust_se_SEA_SWIID,
                    NULL, robust_se_Africa_SWIID),
          title = "Two-way fixed effects with SWIID Gini as a Function of FDI and Neopatrimonialism",
          out = "C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Tables/tableSEASSA_fixed.html",
          column.labels = c("Southeast Asia", "SEA robust SE",
                            "Africa", "SSA robust SE"),
          colnames = FALSE,
          dep.var.labels = "Gini-SWIID",
          model.numbers = TRUE, 
          df = TRUE)


robust_se_SEA_WID = vcovHC(fixed_reg_SEA_global_WID,
                             method = "arellano", type = "HC1") %>% diag() %>% sqrt()

robust_se_Africa_WID = vcovHC(fixed_reg_Africa_global_WID, 
                                method = "arellano", type = "HC1") %>% diag() %>% sqrt()


Mod2.1 <- fixed_reg_SEA_global_WID
Mod2.2 <- fixed_reg_Africa_global_WID

stargazer(list(Mod2.1, Mod2.1,
               Mod2.2, Mod2.2),
          type = "html",
          se = list(NULL, robust_se_SEA_WID,
                    NULL, robust_se_Africa_WID),
          title = "Two-way fixed effects with Top 10% income share as a Function of FDI and Neopatrimonialism",
          out = "C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Tables/tableWID_fixed.html",
          column.labels = c("Southeast Asia", "SEA robust SE",
                            "Africa", "SSA robust SE"),
          colnames = FALSE,
          dep.var.labels = "Income Share of the Top 10% of the income distribution",
          model.numbers = TRUE, 
          df = TRUE)

Mod3.1 <- fixed_reg_SEA_global_WID_adj
Mod3.2 <- fixed_reg_Africa_global_WID_adj

robust_se_SEA_WID_adj = vcovHC(fixed_reg_SEA_global_WID_adj,
                                   method = "arellano", type = "HC1") %>% diag() %>% sqrt()

robust_se_Africa_WID_adj = vcovHC(fixed_reg_Africa_global_WID_adj, 
                                 method = "arellano", type = "HC1") %>% diag() %>% sqrt()

stargazer(list(Mod3.1, Mod3.1,
               Mod3.2, Mod3.2),
          type = "html",
          se = list(NULL, robust_se_SEA_WID_adj,
                    NULL, robust_se_Africa_WID_adj),
          title = "Two-way fixed effects with Top 10% Income Share with lags and squared education",
          out = "C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Tables/tableWID_adj.html",
          column.labels = c("Southeast Asia", "SEA robust SE",
                            "Africa", "SSA robust SE"),
          colnames = FALSE,
          dep.var.labels = "Income Share of the Top 10% of the Income Distribution",
          model.numbers = TRUE, 
          df = TRUE)



Mod4.1 <- fixed_reg_by_source_SWIID
Mod4.2 <- fixed_reg_by_source_WID

robust_se_by_source_SWIID = vcovHC(fixed_reg_by_source_SWIID,
                           method = "arellano", type = "HC1") %>% diag() %>% sqrt()

robust_se_by_source_WID = vcovHC(fixed_reg_by_source_WID, 
                              method = "arellano", type = "HC1") %>% diag() %>% sqrt()


stargazer(list(Mod4.1, Mod4.1,
               Mod4.2, Mod4.2),
          type = "html",
          se = list(NULL, robust_se_by_source_SWIID,
                    NULL, robust_se_by_source_WID),
          title = "Two-way fixed effects for FDI by source pooled sample",
          out = "C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Tables/by_source_fixed.html",
          column.labels = c("SWIID", "robust SE",
                            "WID", "robust SE"),
          colnames = FALSE,
          dep.var.labels = c("Gini-SWIID", "Top 10% Income Share"),
          model.numbers = FALSE, 
          df = TRUE)

Mod5.1 <- WID_high_neopat_fixed
Mod5.2 <- WID_low_neopat_fixed

robust_se_high_neopat = vcovHC(WID_high_neopat_fixed,
                                   method = "arellano", type = "HC1") %>% diag() %>% sqrt()

robust_se_low_neopat = vcovHC(WID_low_neopat_fixed, 
                                     method = "arellano", type = "HC1") %>% diag() %>% sqrt()


stargazer(list(Mod5.1, Mod5.1,
               Mod5.2, Mod5.2),
          type = "html",
          se = list(NULL, robust_se_high_neopat,
                    NULL, robust_se_low_neopat),
          title = "Two-way fixed effects for top and bottom quartile of NEOPAT",
          out = "C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Tables/high_low_fixed.html",
          column.labels = c("High NEOPAT", "robust SE",
                            "Low NEOPAT", "robust SE"),
          colnames = FALSE,
          dep.var.labels = "Income Share of the Top 10% of the Income Distribution",
          model.numbers = TRUE, 
          df = TRUE)

Mod6.1 <- fixed_reg_SEA_global_SWIID_add
Mod6.2 <- fixed_reg_SEA_global_WID_add

robust_se_6.1 <- vcovHC(fixed_reg_SEA_global_SWIID_add,
                        method = "arellano", type = "HC1") %>% diag() %>% sqrt()

robust_se_6.2 <- vcovHC(fixed_reg_SEA_global_WID_add,
                        method = "arellano", type = "HC1") %>% diag() %>% sqrt()


stargazer(list(Mod6.1, Mod6.1,
               Mod6.2, Mod6.2),
          type = "html",
          se = list(NULL, robust_se_high_neopat,
                    NULL, robust_se_low_neopat),
          title = "Two-way fixed effects with additional interaction terms",
          out = "C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Tables/additional_fixed.html",
          column.labels = c("", "robust SE",
                            "", "robust SE"),
          colnames = FALSE,
          dep.var.labels = c("SWIID-Gini", "Top 10% Income Share"),
          model.numbers = TRUE, 
          df = TRUE)


## plotting effects

summary(fixed_reg_SEA_global_SWIID)

function_SEA_no_pat <- function(x){5.2211*0.02 + 0.091580*x + (-0.00017855*x^2) +
    (-0.20114*(x*0.02)) +
    (0.00076607*(x^2*0.02))}

function_SEA_low_pat <- function(x){5.2211*0.1 + 0.091580*x + (-0.00017855*x^2) + 
    (-0.20114*(x*0.1)) +
    (0.00076607*(x^2*0.1))}

function_SEA_med_pat <- function(x){5.2211*0.3 + 0.091580*x + (-0.00017855*x^2) + 
    (-0.20114*(x*0.3)) +
    (0.00076607*(x^2*0.3))}

function_SEA_high_pat <- function(x){5.2211*0.8 + 0.091580*x + (-0.00017855*x^2) + 
    (-0.20114*(x*0.8)) +
    (0.00076607*(x^2*0.8))}


plot_SEA <- ggplot(database_SEA, aes(x = FDI)) +
            stat_function(fun = function_SEA_no_pat, aes(color = "none (0.02)"), size = 1.3) +
            stat_function(fun = function_SEA_low_pat, aes(color = "low (0.1)"), size = 1.3) +
            stat_function(fun = function_SEA_med_pat, aes(color = "medium (0.3)"), size = 1.3) +
            stat_function(fun = function_SEA_high_pat, aes(color = "high (0.8)"), size = 1.3) +
            xlim(0, 400) +
            scale_color_manual("Level Neopatrimonialism", values = c("royalblue4", "steelblue3",
                                                                     "skyblue1", "lightskyblue1")) +
            theme(legend.position = "bottom") + ylab("SWIID-Gini")


summary(subset(database_SEA, CountryName == "Burma/Myanmar")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Thailand")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Cambodia")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Laos")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Vietnam")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Malaysia")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Indonesia")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Philippines")$SWIID_Gini)
summary(subset(database_SEA, CountryName == "Singapore")$SWIID_Gini)


## plotting for African sample

summary(fixed_reg_Africa_global_SWIID)

function_Africa_no_pat <- function(x){0.67588*0.02 + 0.060070*x + (-0.00042530*x^2) +
    (-0.11555*(x*0.02)) +
    (0.00097095*(x^2*0.02))}

function_Africa_low_pat <- function(x){0.67588*0.1 + 0.060070*x + (-0.00042530*x^2) + 
    (-0.11555*(x*0.1)) +
    (0.00097095*(x^2*0.1))}

function_Africa_med_pat <- function(x){0.67588*0.3 + 0.060070*x + (-0.00042530*x^2) + 
    (-0.11555*(x*0.3)) +
    (0.00097095*(x^2*0.3))}

function_Africa_high_pat <- function(x){0.67588*0.8 + 0.060070*x + (-0.00042530*x^2) + 
    (-0.11555*(x*0.8)) +
    (0.00097095*(x^2*0.8))}


plot_Africa <- ggplot(database_Africa, aes(x = FDI)) +
               stat_function(fun = function_Africa_no_pat, aes(color = "none (0.02)"), size = 1.3) +
               stat_function(fun = function_Africa_low_pat, aes(color = "low (0.1)"), size = 1.3) +
               stat_function(fun = function_Africa_med_pat, aes(color = "medium (0.3)"), size = 1.3) +
               stat_function(fun = function_Africa_high_pat, aes(color = "high (0.8)"), size = 1.3) +
               xlim(0, 150) +
               scale_color_manual("Level Neopatrimonialism", values = c("royalblue4", "steelblue3",
                                                           "skyblue1", "lightskyblue1")) +
               theme(legend.position = "none") + ylab("")


nexus_plot <- ggarrange(plot_SEA, plot_Africa,
                        ncol = 2, nrow = 1,
                        align = "v",
                        heights = c(2,2),
                        widths = c(2,2),
                        common.legend = TRUE,
                        legend = "bottom")

annotate_figure(nexus_plot, top = text_grob("FDI-Inequality nexus at different levels of neopatrimonialism", 
                                      color = "black", face = "bold", size = 14))

## plot excluding Singapore

ex_Singa <- plm(formula = SWIID_Gini ~ NEOPAT + GDPpc + GDPpc2 + 
                                    TRADE + TRADE2 + EDUC + 
                                    TELLINE + DEMOCRACY + 
                                    SECTOR + FINDEV +
                                    FDI + FDI*NEOPAT + 
                                    FDI2 + FDI2*NEOPAT, 
                                  data = subset(database_SEA, CountryName != "Singapore"), 
                                  na.action = na.exclude, 
                                  index = c("CountryName", "year"), model = "within",
                                  effect = "twoway")

summary(ex_Singa)

function_Singa_no_pat <- function(x){0.18497*x + (-0.0026365*x^2) +
    (-0.33438*(x*0.02)) +
    (0.0038271*(x^2*0.02))}

function_Singa_low_pat <- function(x){0.18497*x + (-0.0026365*x^2) + 
    (-0.33438*(x*0.1)) +
    (0.0038271*(x^2*0.1))}

function_Singa_med_pat <- function(x){0.18497*x + (-0.0026365*x^2) + 
    (-0.33438*(x*0.5)) +
    (0.0038271*(x^2*0.5))}

function_Singa_high_pat <- function(x){0.18497*x + (-0.0026365*x^2) + 
    (-0.33438*(x*0.8)) +
    (0.0038271*(x^2*0.8))}

plot_ex_Singa <- ggplot(database_SEA, aes(x = FDI)) +
  stat_function(fun = function_Singa_no_pat, aes(color = "none (0.02)"), size = 1.3) +
  stat_function(fun = function_Singa_low_pat, aes(color = "low (0.1)"), size = 1.3) +
  stat_function(fun = function_Singa_med_pat, aes(color = "medium (0.3)"), size = 1.3) +
  stat_function(fun = function_Singa_high_pat, aes(color = "high (0.8)"), size = 1.3) +
  xlim(0, 100) +
  scale_color_manual("Level Neopatrimonialism", values = c("royalblue4", "steelblue3",
                                                           "skyblue1", "lightskyblue1")) +
  theme(legend.position = "bottom") + ylab("SWIID-Gini") +
  ggtitle("Plot SEA excluding Singapore")



## subsets for interpretation

SEA_flip <- subset(database_SEA, subset = year == 2020 & NEOPAT > 0.3)

summary(database_SEA$NEOPAT)

summary(database_Africa$NEOPAT)

summary(subset(database_SEA, CountryName == "Singapore")$FDI)

summary(subset(database_SEA, CountryName != "Singapore")$FDI)

Africa_flip <- subset(database_Africa, subset = year == 2020 & NEOPAT > 0.5)

Af_no_countries <- subset(database_Africa, year == 2020)

## outlier testing and flipping point adjustment

summary(database_SEA$FDI)

hist(database_SEA$FDI)

SEA_outliers <- subset(database_SEA, FDI > 150)

fixed_reg_SEA_global_SWIID_ex_sin <- plm(formula = SWIID_Gini ~ NEOPAT + GDPpc + GDPpc2 + 
                                    TRADE + TRADE2 + EDUC +
                                    TELLINE + DEMOCRACY + 
                                    SECTOR + FINDEV +
                                    FDI + FDI*NEOPAT + 
                                    FDI2 + FDI2*NEOPAT, 
                                  data = subset(database_SEA, CountryName != "Singapore"), 
                                  na.action = na.exclude, 
                                  index = c("CountryName", "year"), model = "within",
                                  effect = "twoway")

summary(fixed_reg_SEA_global_SWIID_ex_sin)


function_SEA_ex_sin_no_pat <- function(x){0.17702*x + (-0.0025048*x^2) +
    (-0.32343*(x*0.02)) +
    (0.0036373*(x^2*0.02))}

function_SEA_ex_sin_low_pat <- function(x){0.17702*x + (-0.0025048*x^2) + 
    (-0.32343*(x*0.1)) +
    (0.0036373*(x^2*0.1))}

function_SEA_ex_sin_med_pat <- function(x){0.17702*x + (-0.0025048*x^2) + 
    (-0.32343*(x*0.3)) +
    (0.0036373*(x^2*0.3))}

function_SEA_ex_sin_high_pat <- function(x){0.17702*x + (-0.0025048*x^2) + 
    (-0.32343*(x*0.8)) +
    (0.0036373*(x^2*0.8))}

plot_SEA_ex_sin <- ggplot(database_SEA, aes(x = FDI)) +
                   stat_function(fun = function_SEA_ex_sin_no_pat, aes(color = "none (0.02)"), size = 1.3) +
                   stat_function(fun = function_SEA_ex_sin_low_pat, aes(color = "low (0.1)"), size = 1.3) +
                   stat_function(fun = function_SEA_ex_sin_med_pat, aes(color = "medium (0.3)"), size = 1.3) +
                   stat_function(fun = function_SEA_ex_sin_high_pat, aes(color = "high (0.8)"), size = 1.3) +
                   xlim(0, 100) +
                   scale_color_manual("Level Neopatrimonialism", values = c("royalblue4", "skyblue1",
                                                                            "steelblue3", "lightskyblue1")) +
                   ggtitle("FDI-Inequality nexus at different levels of neopatrimonialism (SEA)") +
                   theme(legend.position = "bottom") + ylab("SWIID-Gini")


## control variable exploration

database_full$democratization <- ifelse(database_full$DEMOCRACY == 1 &
                                          lag(database_full$DEMOCRACY, 1) == 0,
                                        1, 0)
democratization <- subset(database_full, subset = democratization == 1 & year > 1990)

democratization_Africa <- subset(democratization, e_regionpol == 4)

democratization_SEA <- subset(democratization, e_regionpol == 7)

## summary table

summary(database_full$NEOPAT)

summary(database_full$FDI)

summary(database_full$SWIID_Gini)

summary(database_full$TOP10_SHARE)

summary(database_full$GDPpc)

summary(database_full$TRADE)

summary(database_full$SECTOR)

summary(database_full$EDUC)


summary(Africa_ex_out$GOV_CAP)


## provide datasets 

write.csv(database_full,"C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Data//database_full.csv", row.names = FALSE)

write.csv(database_full,"C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Data//database_SEA.csv", row.names = FALSE)

write.csv(database_full,"C:/Users/philipp.becker/Documents/LSE/Dissertation/Hand in/Data//database_Africa.csv", row.names = FALSE)



